From 12c505ead92c0cbb2ad3edab0d91d98b6f62a5af Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Thu, 29 Jan 2026 03:30:37 +0100 Subject: [PATCH] [PATCH] deps,build,test: fix array index hash collision This enables v8_enable_seeded_array_index_hash and add a test for it. Fixes: https://hackerone.com/reports/3511792 deps: V8: backport 0a8b1cdcc8b2 Original commit message: implement rapidhash secret generation Bug: 409717082 Change-Id: I471f33d66de32002f744aeba534c1d34f71e27d2 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/6733490 Reviewed-by: Leszek Swirski Commit-Queue: snek Cr-Commit-Position: refs/heads/main@{#101499} Refs: https://github.com/v8/v8/commit/0a8b1cdcc8b243c62cf045fa8beb50600e11758a Co-authored-by: Joyee Cheung deps: V8: backport 185f0fe09b72 Original commit message: [numbers] Refactor HashSeed as a lightweight view over ByteArray Instead of copying the seed and secrets into a struct with value fields, HashSeed now stores a pointer pointing either into the read-only ByteArray, or the static default seed for off-heap HashSeed::Default() calls. The underlying storage is always 8-byte aligned so we can cast it directly into a struct. Change-Id: I5896a7f2ae24296eb4c80b757a5d90ac70a34866 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7609720 Reviewed-by: Leszek Swirski Commit-Queue: Joyee Cheung Cr-Commit-Position: refs/heads/main@{#105531} Refs: https://github.com/v8/v8/commit/185f0fe09b72fb869fdcf9a89f40ff2295436bca Co-authored-by: Joyee Cheung deps: V8: backport 1361b2a49d02 Original commit message: [strings] improve array index hash distribution Previously, the hashes stored in a Name's raw_hash_field for decimal numeric strings (potential array indices) consist of the literal integer value along with the length of the string. This means consecutive numeric strings can have consecutive hash values, which can lead to O(n^2) probing for insertion in the worst case when e.g. a non-numeric string happen to land in the these buckets. This patch adds a build-time flag v8_enable_seeded_array_index_hash that scrambles the 24-bit array-index value stored in a Name's raw_hash_field to improve the distribution. x ^= x >> kShift; x = (x * m1) & kMask; // round 1 x ^= x >> kShift; x = (x * m2) & kMask; // round 2 x ^= x >> kShift; // finalize To decode, apply the same steps with the modular inverses of m1 and m2 in reverse order. x ^= x >> kShift; x = (x * m2_inv) & kMask; // round 1 x ^= x >> kShift; x = (x * m1_inv) & kMask; // round 2 x ^= x >> kShift; // finalize where kShift = kArrayIndexValueBits / 2, kMask = kArrayIndexValueMask, m1, m2 (both odd) are the lower bits of the rapidhash secrets, m1_inv, m2_inv (modular inverses) are precomputed modular inverse of m1 and m2. The pre-computed values are appended to the hash_seed ByteArray in ReadOnlyRoots and accessed in generated code to reduce overhead. In call sites that don't already have access to the seeds, we read them from the current isolate group/isolate's read only roots. To consolidate the code that encode/decode these hashes, this patch adds MakeArrayIndexHash/DecodeArrayIndexFromHashField in C++ and CSA that perform seeding/unseeding if enabled, and updates places where encoding/decoding of array index is needed to use them. Bug: 477515021 Change-Id: I350afe511951a54c4378396538152cc56565fd55 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7564330 Reviewed-by: Leszek Swirski Commit-Queue: Joyee Cheung Cr-Commit-Position: refs/heads/main@{#105596} Refs: https://github.com/v8/v8/commit/1361b2a49d020a718dc5495713eae0fa67d697b9 Co-authored-by: Joyee Cheung deps: V8: cherry-pick aac14dd95e5b Original commit message: [string] add 3rd round to seeded array index hash Since we already have 3 derived secrets, and arithmetics are relatively cheap, add a 3rd round to the xorshift-multiply seeding scheme. This brings the bias from ~3.4 to ~0.4. Bug: 477515021 Change-Id: I1ef48954bcee8768d8c90db06ac8adb02f06cebf Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/7655117 Reviewed-by: Chengzhong Wu Commit-Queue: Joyee Cheung Reviewed-by: Leszek Swirski Cr-Commit-Position: refs/heads/main@{#105824} Refs: https://github.com/v8/v8/commit/aac14dd95e5be0d487eba6bcdaf9cef4f8bd806c PR-URL: https://github.com/nodejs-private/node-private/pull/834 CVE-ID: CVE-2026-21717 Gbp-Pq: Topic sec Gbp-Pq: Name 51-fix-array-index-hash-collision.patch --- common.gypi | 2 +- deps/v8/.gitignore | 1 + deps/v8/BUILD.bazel | 10 + deps/v8/BUILD.gn | 18 +- deps/v8/src/DEPS | 11 +- deps/v8/src/ast/ast-value-factory.cc | 6 +- deps/v8/src/ast/ast-value-factory.h | 13 +- deps/v8/src/builtins/number.tq | 4 +- deps/v8/src/codegen/code-stub-assembler.cc | 67 +++- deps/v8/src/codegen/code-stub-assembler.h | 6 + deps/v8/src/codegen/external-reference.cc | 3 +- deps/v8/src/debug/debug-interface.cc | 3 +- deps/v8/src/heap/factory-base.cc | 13 +- deps/v8/src/heap/factory-base.h | 6 +- deps/v8/src/heap/factory.cc | 3 +- deps/v8/src/heap/heap.cc | 13 - deps/v8/src/heap/heap.h | 3 - deps/v8/src/heap/read-only-heap-inl.h | 12 +- deps/v8/src/heap/read-only-heap.h | 5 + deps/v8/src/heap/setup-heap-internal.cc | 7 +- deps/v8/src/json/json-parser.cc | 1 - deps/v8/src/json/json-parser.h | 1 - deps/v8/src/numbers/hash-seed-inl.h | 56 ++-- deps/v8/src/numbers/hash-seed.cc | 114 +++++++ deps/v8/src/numbers/hash-seed.h | 102 +++++++ deps/v8/src/objects/dictionary-inl.h | 4 +- deps/v8/src/objects/name.h | 14 +- deps/v8/src/objects/name.tq | 41 ++- deps/v8/src/objects/objects.cc | 16 - deps/v8/src/objects/string-inl.h | 15 +- deps/v8/src/objects/string-table.cc | 8 +- deps/v8/src/objects/string.cc | 14 +- deps/v8/src/parsing/parse-info.h | 7 +- .../profiler/heap-snapshot-generator-inl.h | 3 +- deps/v8/src/profiler/strings-storage.cc | 2 +- deps/v8/src/runtime/runtime.cc | 4 +- .../v8/src/snapshot/read-only-deserializer.cc | 3 +- .../src/snapshot/shared-heap-deserializer.cc | 3 +- deps/v8/src/strings/string-hasher-inl.h | 89 +++++- deps/v8/src/strings/string-hasher.h | 60 +++- deps/v8/src/torque/torque-parser.cc | 5 + deps/v8/src/utils/utils.h | 2 - deps/v8/src/wasm/wasm-module.cc | 2 +- deps/v8/third_party/rapidhash-v8/LICENSE | 34 +++ deps/v8/third_party/rapidhash-v8/OWNERS | 2 + .../third_party/rapidhash-v8/README.chromium | 20 ++ deps/v8/third_party/rapidhash-v8/rapidhash.h | 285 ++++++++++++++++++ deps/v8/third_party/rapidhash-v8/secret.h | 198 ++++++++++++ test/fixtures/array-hash-collision.js | 27 ++ test/pummel/test-array-hash-collision.js | 27 ++ tools/make-v8.sh | 2 + tools/v8_gypfiles/features.gypi | 6 + 52 files changed, 1231 insertions(+), 142 deletions(-) create mode 100644 deps/v8/src/numbers/hash-seed.cc create mode 100644 deps/v8/src/numbers/hash-seed.h create mode 100644 deps/v8/third_party/rapidhash-v8/LICENSE create mode 100644 deps/v8/third_party/rapidhash-v8/OWNERS create mode 100644 deps/v8/third_party/rapidhash-v8/README.chromium create mode 100644 deps/v8/third_party/rapidhash-v8/rapidhash.h create mode 100644 deps/v8/third_party/rapidhash-v8/secret.h create mode 100644 test/fixtures/array-hash-collision.js create mode 100644 test/pummel/test-array-hash-collision.js diff --git a/common.gypi b/common.gypi index d3c17d47b..f61aee667 100644 --- a/common.gypi +++ b/common.gypi @@ -36,7 +36,7 @@ # Reset this number to 0 on major V8 upgrades. # Increment by one for each non-official patch applied to deps/v8. - 'v8_embedder_string': '-node.26', + 'v8_embedder_string': '-node.38', ##### V8 defaults for Node.js ##### diff --git a/deps/v8/.gitignore b/deps/v8/.gitignore index ed10b5227..3a57f17f1 100644 --- a/deps/v8/.gitignore +++ b/deps/v8/.gitignore @@ -78,6 +78,7 @@ !/third_party/googletest/src/googletest/include/gtest /third_party/googletest/src/googletest/include/gtest/* !/third_party/googletest/src/googletest/include/gtest/gtest_prod.h +!/third_party/rapidhash-v8 !/third_party/test262-harness !/third_party/v8 !/third_party/wasm-api diff --git a/deps/v8/BUILD.bazel b/deps/v8/BUILD.bazel index 81a9286d2..043b45fa1 100644 --- a/deps/v8/BUILD.bazel +++ b/deps/v8/BUILD.bazel @@ -164,6 +164,11 @@ v8_flag( default = False, ) +v8_flag( + name = "v8_enable_seeded_array_index_hash", + default = False, +) + v8_int( name = "v8_typed_array_max_size_in_heap", default = 64, @@ -339,6 +344,7 @@ v8_config( "v8_enable_verify_heap": "VERIFY_HEAP", "v8_enable_verify_predictable": "VERIFY_PREDICTABLE", "v8_enable_webassembly": "V8_ENABLE_WEBASSEMBLY", + "v8_enable_seeded_array_index_hash": "V8_ENABLE_SEEDED_ARRAY_INDEX_HASH", "v8_jitless": "V8_JITLESS", }, defines = [ @@ -1685,6 +1691,8 @@ filegroup( "src/numbers/conversions-inl.h", "src/numbers/conversions.cc", "src/numbers/conversions.h", + "src/numbers/hash-seed.h", + "src/numbers/hash-seed.cc", "src/numbers/hash-seed-inl.h", "src/numbers/integer-literal-inl.h", "src/numbers/integer-literal.h", @@ -2245,6 +2253,8 @@ filegroup( "src/execution/pointer-authentication-dummy.h", "src/heap/third-party/heap-api.h", "src/heap/third-party/heap-api-stub.cc", + "third_party/rapidhash-v8/rapidhash.h", + "third_party/rapidhash-v8/secret.h", ] + select({ "@v8//bazel/config:v8_target_ia32": [ "src/baseline/ia32/baseline-assembler-ia32-inl.h", diff --git a/deps/v8/BUILD.gn b/deps/v8/BUILD.gn index c42d5b862..80774d3a2 100644 --- a/deps/v8/BUILD.gn +++ b/deps/v8/BUILD.gn @@ -399,6 +399,12 @@ declare_args() { # iOS (non-simulator) does not have executable pages for 3rd party # applications yet so disable jit. v8_jitless = v8_enable_lite_mode || target_is_ios_device + + # Use a hard-coded secret value when hashing. + v8_use_default_hasher_secret = true + + # Enable seeded array index hash. + v8_enable_seeded_array_index_hash = false } # Derived defaults. @@ -802,6 +808,7 @@ config("external_startup_data") { # List of defines that can appear in externally visible header files and that # are controlled by args.gn. external_v8_defines = [ + "V8_USE_DEFAULT_HASHER_SECRET=${v8_use_default_hasher_secret}", "V8_ENABLE_CHECKS", "V8_COMPRESS_POINTERS", "V8_COMPRESS_POINTERS_IN_SHARED_CAGE", @@ -818,7 +825,9 @@ external_v8_defines = [ "V8_ENABLE_CONSERVATIVE_STACK_SCANNING", ] -enabled_external_v8_defines = [] +enabled_external_v8_defines = [ + "V8_USE_DEFAULT_HASHER_SECRET=${v8_use_default_hasher_secret}", +] if (v8_enable_v8_checks) { enabled_external_v8_defines += [ "V8_ENABLE_CHECKS" ] @@ -970,6 +979,9 @@ config("features") { if (v8_enable_lite_mode) { defines += [ "V8_LITE_MODE" ] } + if (v8_enable_seeded_array_index_hash) { + defines += [ "V8_ENABLE_SEEDED_ARRAY_INDEX_HASH" ] + } if (v8_enable_gdbjit) { defines += [ "ENABLE_GDB_JIT_INTERFACE" ] } @@ -3381,6 +3393,7 @@ v8_header_set("v8_internal_headers") { "src/numbers/conversions-inl.h", "src/numbers/conversions.h", "src/numbers/hash-seed-inl.h", + "src/numbers/hash-seed.h", "src/numbers/math-random.h", "src/objects/all-objects-inl.h", "src/objects/allocation-site-inl.h", @@ -3710,6 +3723,8 @@ v8_header_set("v8_internal_headers") { "src/temporal/temporal-parser.h", "src/third_party/siphash/halfsiphash.h", "src/third_party/utf8-decoder/utf8-decoder.h", + "third_party/rapidhash-v8/rapidhash.h", + "third_party/rapidhash-v8/secret.h", "src/torque/runtime-macro-shims.h", "src/tracing/trace-event.h", "src/tracing/traced-value.h", @@ -4872,6 +4887,7 @@ v8_source_set("v8_base_without_compiler") { "src/logging/runtime-call-stats.cc", "src/logging/tracing-flags.cc", "src/numbers/conversions.cc", + "src/numbers/hash-seed.cc", "src/numbers/math-random.cc", "src/objects/backing-store.cc", "src/objects/bigint.cc", diff --git a/deps/v8/src/DEPS b/deps/v8/src/DEPS index ebe2dd0dc..c21333bc8 100644 --- a/deps/v8/src/DEPS +++ b/deps/v8/src/DEPS @@ -100,5 +100,14 @@ specific_include_rules = { ], "etw-jit-metadata-win\.h": [ "+src/libplatform/etw/etw-provider-win.h", - ] + ], + "string-hasher-inl\.h": [ + "+third_party/rapidhash-v8/rapidhash.h", + ], + "heap\.cc": [ + "+third_party/rapidhash-v8/secret.h", + ], + "hash-seed\.cc": [ + "+third_party/rapidhash-v8/secret.h", + ], } diff --git a/deps/v8/src/ast/ast-value-factory.cc b/deps/v8/src/ast/ast-value-factory.cc index 6cf5796c6..642a194d5 100644 --- a/deps/v8/src/ast/ast-value-factory.cc +++ b/deps/v8/src/ast/ast-value-factory.cc @@ -82,7 +82,8 @@ bool AstRawString::AsArrayIndex(uint32_t* index) const { // can't be convertible to an array index. if (!IsIntegerIndex()) return false; if (length() <= Name::kMaxCachedArrayIndexLength) { - *index = Name::ArrayIndexValueBits::decode(raw_hash_field_); + *index = StringHasher::DecodeArrayIndexFromHashField( + raw_hash_field_, HashSeed(ReadOnlyHeap::GetReadOnlyRoots())); return true; } // Might be an index, but too big to cache it. Do the slow conversion. This @@ -291,7 +292,8 @@ std::forward_list AstConsString::ToRawStrings() const { return result; } -AstStringConstants::AstStringConstants(Isolate* isolate, uint64_t hash_seed) +AstStringConstants::AstStringConstants(Isolate* isolate, + const HashSeed hash_seed) : zone_(isolate->allocator(), ZONE_NAME), string_table_(), hash_seed_(hash_seed) { diff --git a/deps/v8/src/ast/ast-value-factory.h b/deps/v8/src/ast/ast-value-factory.h index f407f83e8..f05c86d19 100644 --- a/deps/v8/src/ast/ast-value-factory.h +++ b/deps/v8/src/ast/ast-value-factory.h @@ -35,6 +35,7 @@ #include "src/common/globals.h" #include "src/handles/handles.h" #include "src/numbers/conversions.h" +#include "src/numbers/hash-seed.h" #include "src/objects/name.h" // Ast(Raw|Cons)String and AstValueFactory are for storing strings and @@ -290,7 +291,7 @@ using AstRawStringMap = class AstStringConstants final { public: - AstStringConstants(Isolate* isolate, uint64_t hash_seed); + AstStringConstants(Isolate* isolate, const HashSeed hash_seed); AstStringConstants(const AstStringConstants&) = delete; AstStringConstants& operator=(const AstStringConstants&) = delete; @@ -299,13 +300,13 @@ class AstStringConstants final { AST_STRING_CONSTANTS(F) #undef F - uint64_t hash_seed() const { return hash_seed_; } + const HashSeed hash_seed() const { return hash_seed_; } const AstRawStringMap* string_table() const { return &string_table_; } private: Zone zone_; AstRawStringMap string_table_; - uint64_t hash_seed_; + const HashSeed hash_seed_; #define F(name, str) AstRawString* name##_string_; AST_STRING_CONSTANTS(F) @@ -315,12 +316,12 @@ class AstStringConstants final { class AstValueFactory { public: AstValueFactory(Zone* zone, const AstStringConstants* string_constants, - uint64_t hash_seed) + const HashSeed hash_seed) : AstValueFactory(zone, zone, string_constants, hash_seed) {} AstValueFactory(Zone* ast_raw_string_zone, Zone* single_parse_zone, const AstStringConstants* string_constants, - uint64_t hash_seed) + const HashSeed hash_seed) : string_table_(string_constants->string_table()), strings_(nullptr), strings_end_(&strings_), @@ -418,7 +419,7 @@ class AstValueFactory { Zone* ast_raw_string_zone_; Zone* single_parse_zone_; - uint64_t hash_seed_; + const HashSeed hash_seed_; }; extern template EXPORT_TEMPLATE_DECLARE( diff --git a/deps/v8/src/builtins/number.tq b/deps/v8/src/builtins/number.tq index 3d0122d60..7c7c2ecbd 100644 --- a/deps/v8/src/builtins/number.tq +++ b/deps/v8/src/builtins/number.tq @@ -298,7 +298,7 @@ transitioning javascript builtin NumberParseFloat( const hash: NameHash = s.raw_hash_field; if (IsIntegerIndex(hash) && hash.array_index_length < kMaxCachedArrayIndexLength) { - const arrayIndex: uint32 = hash.array_index_value; + const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash); return SmiFromUint32(arrayIndex); } // Fall back to the runtime to convert string to a number. @@ -349,7 +349,7 @@ transitioning builtin ParseInt(implicit context: Context)( const hash: NameHash = s.raw_hash_field; if (IsIntegerIndex(hash) && hash.array_index_length < kMaxCachedArrayIndexLength) { - const arrayIndex: uint32 = hash.array_index_value; + const arrayIndex: uint32 = DecodeArrayIndexFromHashField(hash); return SmiFromUint32(arrayIndex); } // Fall back to the runtime. diff --git a/deps/v8/src/codegen/code-stub-assembler.cc b/deps/v8/src/codegen/code-stub-assembler.cc index c5fdaac24..4d168237f 100644 --- a/deps/v8/src/codegen/code-stub-assembler.cc +++ b/deps/v8/src/codegen/code-stub-assembler.cc @@ -2185,6 +2185,66 @@ TNode CodeStubAssembler::LoadJSReceiverIdentityHash( return var_hash.value(); } +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH +// Mirror C++ StringHasher::SeedArrayIndexValue. +TNode CodeStubAssembler::SeedArrayIndexValue(TNode value) { + // Load m1, m2 and m3 from the hash seed byte array. In the compiled code + // these will always come from the read-only roots. + TNode hash_seed = CAST(LoadRoot(RootIndex::kHashSeed)); + intptr_t base_offset = ByteArray::kHeaderSize - kHeapObjectTag; + TNode m1 = Load( + hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM1Offset)); + TNode m2 = Load( + hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM2Offset)); + TNode m3 = Load( + hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM3Offset)); + + TNode x = value; + // 3-round xorshift-multiply. + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + x = Word32And(Uint32Mul(Unsigned(x), m1), + Uint32Constant(Name::kArrayIndexValueMask)); + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + x = Word32And(Uint32Mul(Unsigned(x), m2), + Uint32Constant(Name::kArrayIndexValueMask)); + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + x = Word32And(Uint32Mul(Unsigned(x), m3), + Uint32Constant(Name::kArrayIndexValueMask)); + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + + return Unsigned(x); +} + +// Mirror C++ StringHasher::UnseedArrayIndexValue. +TNode CodeStubAssembler::UnseedArrayIndexValue(TNode value) { + // Load m1_inv, m2_inv and m3_inv from the hash seed byte array. In the + // compiled code these will always come from the read-only roots. + TNode hash_seed = CAST(LoadRoot(RootIndex::kHashSeed)); + intptr_t base_offset = ByteArray::kHeaderSize - kHeapObjectTag; + TNode m1_inv = Load( + hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM1InvOffset)); + TNode m2_inv = Load( + hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM2InvOffset)); + TNode m3_inv = Load( + hash_seed, IntPtrConstant(base_offset + HashSeed::kDerivedM3InvOffset)); + + TNode x = value; + // 3-round xorshift-multiply (inverse). + // Xorshift is an involution when kShift is at least half of the value width. + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + x = Word32And(Uint32Mul(Unsigned(x), m3_inv), + Uint32Constant(Name::kArrayIndexValueMask)); + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + x = Word32And(Uint32Mul(Unsigned(x), m2_inv), + Uint32Constant(Name::kArrayIndexValueMask)); + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + x = Word32And(Uint32Mul(Unsigned(x), m1_inv), + Uint32Constant(Name::kArrayIndexValueMask)); + x = Word32Xor(x, Word32Shr(x, Uint32Constant(Name::kArrayIndexHashShift))); + return Unsigned(x); +} +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + TNode CodeStubAssembler::LoadNameHashAssumeComputed(TNode name) { TNode hash_field = LoadNameRawHash(name); CSA_DCHECK(this, IsClearWord32(hash_field, Name::kHashNotComputedMask)); @@ -7620,8 +7680,7 @@ TNode CodeStubAssembler::StringToNumber(TNode input) { GotoIf(IsSetWord32(raw_hash_field, Name::kDoesNotContainCachedArrayIndexMask), &runtime); - var_result = SmiTag(Signed( - DecodeWordFromWord32(raw_hash_field))); + var_result = SmiFromUint32(DecodeArrayIndexFromHashField(raw_hash_field)); Goto(&end); BIND(&runtime); @@ -8496,8 +8555,8 @@ void CodeStubAssembler::TryToName(TNode key, Label* if_keyisindex, BIND(&if_has_cached_index); { - TNode index = Signed( - DecodeWordFromWord32(raw_hash_field)); + TNode index = Signed(ChangeUint32ToWord( + DecodeArrayIndexFromHashField(raw_hash_field))); CSA_DCHECK(this, IntPtrLessThan(index, IntPtrConstant(INT_MAX))); *var_index = index; Goto(if_keyisindex); diff --git a/deps/v8/src/codegen/code-stub-assembler.h b/deps/v8/src/codegen/code-stub-assembler.h index fa41984c6..2f87b5c86 100644 --- a/deps/v8/src/codegen/code-stub-assembler.h +++ b/deps/v8/src/codegen/code-stub-assembler.h @@ -4293,6 +4293,12 @@ class V8_EXPORT_PRIVATE CodeStubAssembler TNode property_details, Label* needs_resize); +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + // Mirror C++ StringHasher::SeedArrayIndexValue and UnseedArrayIndexValue. + TNode SeedArrayIndexValue(TNode value); + TNode UnseedArrayIndexValue(TNode value); +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + private: friend class CodeStubArguments; diff --git a/deps/v8/src/codegen/external-reference.cc b/deps/v8/src/codegen/external-reference.cc index 7990f5216..aeeac3e09 100644 --- a/deps/v8/src/codegen/external-reference.cc +++ b/deps/v8/src/codegen/external-reference.cc @@ -1096,7 +1096,8 @@ FUNCTION_REFERENCE(jsreceiver_create_identity_hash, static uint32_t ComputeSeededIntegerHash(Isolate* isolate, int32_t key) { DisallowGarbageCollection no_gc; - return ComputeSeededHash(static_cast(key), HashSeed(isolate)); + return ComputeSeededHash(static_cast(key), + HashSeed(isolate).seed()); } FUNCTION_REFERENCE(compute_integer_hash, ComputeSeededIntegerHash) diff --git a/deps/v8/src/debug/debug-interface.cc b/deps/v8/src/debug/debug-interface.cc index 259a6fd83..1f3307e7d 100644 --- a/deps/v8/src/debug/debug-interface.cc +++ b/deps/v8/src/debug/debug-interface.cc @@ -899,7 +899,8 @@ uint32_t WasmScript::GetFunctionHash(int function_index) { wire_bytes.GetFunctionBytes(&func); // TODO(herhut): Maybe also take module, name and signature into account. return i::StringHasher::HashSequentialString(function_bytes.begin(), - function_bytes.length(), 0); + function_bytes.length(), + internal::HashSeed::Default()); } int WasmScript::CodeOffset() const { diff --git a/deps/v8/src/heap/factory-base.cc b/deps/v8/src/heap/factory-base.cc index 13b712bf9..0554f30eb 100644 --- a/deps/v8/src/heap/factory-base.cc +++ b/deps/v8/src/heap/factory-base.cc @@ -229,7 +229,8 @@ Handle FactoryBase::NewWeakFixedArray( template Handle FactoryBase::NewByteArray(int length, - AllocationType allocation) { + AllocationType allocation, + AllocationAlignment alignment) { if (length < 0 || length > ByteArray::kMaxLength) { FATAL("Fatal JavaScript invalid size error %d", length); UNREACHABLE(); @@ -237,7 +238,7 @@ Handle FactoryBase::NewByteArray(int length, if (length == 0) return impl()->empty_byte_array(); int size = ALIGN_TO_ALLOCATION_ALIGNMENT(ByteArray::SizeFor(length)); HeapObject result = AllocateRawWithImmortalMap( - size, allocation, read_only_roots().byte_array_map()); + size, allocation, read_only_roots().byte_array_map(), alignment); DisallowGarbageCollection no_gc; ByteArray array = ByteArray::cast(result); array.set_length(length); @@ -972,7 +973,8 @@ inline Handle FactoryBase::SmiToString(Smi number, if (raw.raw_hash_field() == String::kEmptyHashField && number.value() >= 0) { uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash( - static_cast(number.value()), raw.length()); + static_cast(number.value()), raw.length(), + HashSeed(read_only_roots())); raw.set_raw_hash_field(raw_hash_field); } } @@ -1123,8 +1125,9 @@ FactoryBase::AllocateRawTwoByteInternalizedString( template HeapObject FactoryBase::AllocateRawArray(int size, - AllocationType allocation) { - HeapObject result = AllocateRaw(size, allocation); + AllocationType allocation, + AllocationAlignment alignment) { + HeapObject result = AllocateRaw(size, allocation, alignment); if (!V8_ENABLE_THIRD_PARTY_HEAP_BOOL && (size > isolate()->heap()->AsHeap()->MaxRegularHeapObjectSize(allocation)) && diff --git a/deps/v8/src/heap/factory-base.h b/deps/v8/src/heap/factory-base.h index 9739d52e4..07912accf 100644 --- a/deps/v8/src/heap/factory-base.h +++ b/deps/v8/src/heap/factory-base.h @@ -159,7 +159,8 @@ class FactoryBase : public TorqueGeneratedFactory { // The function returns a pre-allocated empty byte array for length = 0. Handle NewByteArray( - int length, AllocationType allocation = AllocationType::kYoung); + int length, AllocationType allocation = AllocationType::kYoung, + AllocationAlignment alignment = kTaggedAligned); Handle NewBytecodeArray(int length, const byte* raw_bytecodes, int frame_size, int parameter_count, @@ -327,7 +328,8 @@ class FactoryBase : public TorqueGeneratedFactory { static constexpr int kNumberToStringBufferSize = 32; // Allocate memory for an uninitialized array (e.g., a FixedArray or similar). - HeapObject AllocateRawArray(int size, AllocationType allocation); + HeapObject AllocateRawArray(int size, AllocationType allocation, + AllocationAlignment alignment = kTaggedAligned); HeapObject AllocateRawFixedArray(int length, AllocationType allocation); HeapObject AllocateRawWeakArrayList(int length, AllocationType allocation); diff --git a/deps/v8/src/heap/factory.cc b/deps/v8/src/heap/factory.cc index c95286234..71217c5f5 100644 --- a/deps/v8/src/heap/factory.cc +++ b/deps/v8/src/heap/factory.cc @@ -3467,7 +3467,8 @@ Handle Factory::SizeToString(size_t value, bool check_cache) { if (value <= JSArray::kMaxArrayIndex && raw.raw_hash_field() == String::kEmptyHashField) { uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash( - static_cast(value), raw.length()); + static_cast(value), raw.length(), + HashSeed(read_only_roots())); raw.set_raw_hash_field(raw_hash_field); } } diff --git a/deps/v8/src/heap/heap.cc b/deps/v8/src/heap/heap.cc index 90aa3360b..8ad0d68b2 100644 --- a/deps/v8/src/heap/heap.cc +++ b/deps/v8/src/heap/heap.cc @@ -5630,19 +5630,6 @@ void Heap::SetUpSpaces(LinearAllocationArea& new_allocation_info, heap_allocator_.Setup(); } -void Heap::InitializeHashSeed() { - DCHECK(!deserialization_complete_); - uint64_t new_hash_seed; - if (v8_flags.hash_seed == 0) { - int64_t rnd = isolate()->random_number_generator()->NextInt64(); - new_hash_seed = static_cast(rnd); - } else { - new_hash_seed = static_cast(v8_flags.hash_seed); - } - ReadOnlyRoots(this).hash_seed().copy_in( - 0, reinterpret_cast(&new_hash_seed), kInt64Size); -} - // static void Heap::InitializeOncePerProcess() { #ifdef V8_ENABLE_ALLOCATION_TIMEOUT diff --git a/deps/v8/src/heap/heap.h b/deps/v8/src/heap/heap.h index f4b7d4036..e9d6d3e62 100644 --- a/deps/v8/src/heap/heap.h +++ b/deps/v8/src/heap/heap.h @@ -822,9 +822,6 @@ class Heap { // Prepares the heap, setting up for deserialization. void InitializeMainThreadLocalHeap(LocalHeap* main_thread_local_heap); - // (Re-)Initialize hash seed from flag or RNG. - void InitializeHashSeed(); - // Invoked once for the process from V8::Initialize. static void InitializeOncePerProcess(); diff --git a/deps/v8/src/heap/read-only-heap-inl.h b/deps/v8/src/heap/read-only-heap-inl.h index ad027a7a0..30f18e03c 100644 --- a/deps/v8/src/heap/read-only-heap-inl.h +++ b/deps/v8/src/heap/read-only-heap-inl.h @@ -28,15 +28,21 @@ ReadOnlyRoots ReadOnlyHeap::EarlyGetReadOnlyRoots(HeapObject object) { // static ReadOnlyRoots ReadOnlyHeap::GetReadOnlyRoots(HeapObject object) { #ifdef V8_SHARED_RO_HEAP + return GetReadOnlyRoots(); +#else + return ReadOnlyRoots(GetHeapFromWritableObject(object)); +#endif // V8_SHARED_RO_HEAP +} + +#ifdef V8_SHARED_RO_HEAP +ReadOnlyRoots ReadOnlyHeap::GetReadOnlyRoots() { auto* shared_ro_heap = SoleReadOnlyHeap::shared_ro_heap_; // If this check fails in code that runs during initialization use // EarlyGetReadOnlyRoots instead. DCHECK(shared_ro_heap && shared_ro_heap->roots_init_complete()); return ReadOnlyRoots(shared_ro_heap->read_only_roots_); -#else - return ReadOnlyRoots(GetHeapFromWritableObject(object)); -#endif // V8_SHARED_RO_HEAP } +#endif // V8_SHARED_RO_HEAP } // namespace internal } // namespace v8 diff --git a/deps/v8/src/heap/read-only-heap.h b/deps/v8/src/heap/read-only-heap.h index 33a6c4a09..943fc04f1 100644 --- a/deps/v8/src/heap/read-only-heap.h +++ b/deps/v8/src/heap/read-only-heap.h @@ -76,6 +76,11 @@ class ReadOnlyHeap { // must be initialized V8_EXPORT_PRIVATE inline static ReadOnlyRoots GetReadOnlyRoots( HeapObject object); + +#ifdef V8_SHARED_RO_HEAP + V8_EXPORT_PRIVATE inline static ReadOnlyRoots GetReadOnlyRoots(); +#endif // V8_SHARED_RO_HEAP + // Returns the current isolates roots table during initialization as opposed // to the shared one in case the latter is not initialized yet. V8_EXPORT_PRIVATE inline static ReadOnlyRoots EarlyGetReadOnlyRoots( diff --git a/deps/v8/src/heap/setup-heap-internal.cc b/deps/v8/src/heap/setup-heap-internal.cc index 3f217e644..3a95eef99 100644 --- a/deps/v8/src/heap/setup-heap-internal.cc +++ b/deps/v8/src/heap/setup-heap-internal.cc @@ -13,6 +13,7 @@ #include "src/init/heap-symbols.h" #include "src/init/setup-isolate.h" #include "src/interpreter/interpreter.h" +#include "src/numbers/hash-seed.h" #include "src/objects/arguments.h" #include "src/objects/call-site-info.h" #include "src/objects/cell-inl.h" @@ -709,8 +710,10 @@ bool Heap::CreateImportantReadOnlyObjects() { // Hash seed for strings Factory* factory = isolate()->factory(); - set_hash_seed(*factory->NewByteArray(kInt64Size, AllocationType::kReadOnly)); - InitializeHashSeed(); + set_hash_seed(*factory->NewByteArray(HashSeed::kTotalSize, + AllocationType::kReadOnly, + AllocationAlignment::kDoubleAligned)); + HashSeed::InitializeRoots(isolate()); // Important strings and symbols for (const ConstantStringInit& entry : kImportantConstantStringTable) { diff --git a/deps/v8/src/json/json-parser.cc b/deps/v8/src/json/json-parser.cc index 4f8924d33..db729d2e6 100644 --- a/deps/v8/src/json/json-parser.cc +++ b/deps/v8/src/json/json-parser.cc @@ -310,7 +310,6 @@ bool JsonParseInternalizer::RecurseAndApply(Handle holder, template JsonParser::JsonParser(Isolate* isolate, Handle source) : isolate_(isolate), - hash_seed_(HashSeed(isolate)), object_constructor_(isolate_->object_function()), original_source_(source) { size_t start = 0; diff --git a/deps/v8/src/json/json-parser.h b/deps/v8/src/json/json-parser.h index 60eb13c83..c087c4b34 100644 --- a/deps/v8/src/json/json-parser.h +++ b/deps/v8/src/json/json-parser.h @@ -382,7 +382,6 @@ class JsonParser final { int position() const { return static_cast(cursor_ - chars_); } Isolate* isolate_; - const uint64_t hash_seed_; JsonToken next_; // Indicates whether the bytes underneath source_ can relocate during GC. bool chars_may_relocate_; diff --git a/deps/v8/src/numbers/hash-seed-inl.h b/deps/v8/src/numbers/hash-seed-inl.h index 0964d5524..e80dcab3a 100644 --- a/deps/v8/src/numbers/hash-seed-inl.h +++ b/deps/v8/src/numbers/hash-seed-inl.h @@ -5,48 +5,36 @@ #ifndef V8_NUMBERS_HASH_SEED_INL_H_ #define V8_NUMBERS_HASH_SEED_INL_H_ -#include - -// The #includes below currently lead to cyclic transitive includes, so -// HashSeed() ends up being required before it is defined, so we have to -// declare it here. This is a workaround; if we needed this permanently then -// we should put that line into a "hash-seed.h" header; but we won't need -// it for long. -// TODO(jkummerow): Get rid of this by breaking circular include dependencies. -namespace v8 { -namespace internal { - -class Isolate; -class LocalIsolate; -class ReadOnlyRoots; - -inline uint64_t HashSeed(Isolate* isolate); -inline uint64_t HashSeed(LocalIsolate* isolate); -inline uint64_t HashSeed(ReadOnlyRoots roots); - -} // namespace internal -} // namespace v8 - -// See comment above for why this isn't at the top of the file. +#include "src/numbers/hash-seed.h" #include "src/objects/fixed-array-inl.h" #include "src/roots/roots-inl.h" namespace v8 { namespace internal { -inline uint64_t HashSeed(Isolate* isolate) { - return HashSeed(ReadOnlyRoots(isolate)); -} +inline HashSeed::HashSeed(Isolate* isolate) + : HashSeed(ReadOnlyRoots(isolate)) {} + +inline HashSeed::HashSeed(LocalIsolate* isolate) + : HashSeed(ReadOnlyRoots(isolate)) {} + +inline HashSeed::HashSeed(ReadOnlyRoots roots) + : data_(reinterpret_cast( + roots.hash_seed().GetDataStartAddress())) {} + +inline HashSeed HashSeed::Default() { return HashSeed(kDefaultData); } -inline uint64_t HashSeed(LocalIsolate* isolate) { - return HashSeed(ReadOnlyRoots(isolate)); -} +inline uint64_t HashSeed::seed() const { return data_->seed; } +inline const uint64_t* HashSeed::secret() const { return data_->secrets; } -inline uint64_t HashSeed(ReadOnlyRoots roots) { - uint64_t seed; - roots.hash_seed().copy_out(0, reinterpret_cast(&seed), kInt64Size); - return seed; -} +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH +inline uint32_t HashSeed::m1() const { return data_->m1; } +inline uint32_t HashSeed::m1_inv() const { return data_->m1_inv; } +inline uint32_t HashSeed::m2() const { return data_->m2; } +inline uint32_t HashSeed::m2_inv() const { return data_->m2_inv; } +inline uint32_t HashSeed::m3() const { return data_->m3; } +inline uint32_t HashSeed::m3_inv() const { return data_->m3_inv; } +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH } // namespace internal } // namespace v8 diff --git a/deps/v8/src/numbers/hash-seed.cc b/deps/v8/src/numbers/hash-seed.cc new file mode 100644 index 000000000..4251cc3ef --- /dev/null +++ b/deps/v8/src/numbers/hash-seed.cc @@ -0,0 +1,114 @@ +// Copyright 2026 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "src/numbers/hash-seed.h" + +#include + +#include "src/execution/isolate.h" +#include "src/flags/flags.h" +#include "src/numbers/hash-seed-inl.h" +#include "src/objects/name.h" +#include "third_party/rapidhash-v8/secret.h" + +namespace v8 { +namespace internal { + +namespace { + +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH +// Calculate the modular inverse using Newton's method. +constexpr uint32_t modular_inverse(uint32_t m) { + uint32_t x = (3 * m) ^ 2; // 5 correct bits + x = x * (2 - m * x); // 10 correct bits + x = x * (2 - m * x); // 20 correct bits + x = x * (2 - m * x); // 40 correct bits + return x; +} + +constexpr uint32_t truncate_for_derived_secrets(uint64_t s) { + return static_cast(s) & Name::kArrayIndexValueMask; +} + +// Derive a multiplier from a rapidhash secret and ensure it's odd. +constexpr uint32_t derive_multiplier(uint64_t secret) { + return truncate_for_derived_secrets(secret) | 1; +} + +// Compute the modular inverse of the derived multiplier. +constexpr uint32_t derive_multiplier_inverse(uint64_t secret) { + return truncate_for_derived_secrets( + modular_inverse(derive_multiplier(secret))); +} + +constexpr bool is_modular_inverse(uint32_t m, uint32_t m_inv) { + return ((m * m_inv) & Name::kArrayIndexValueMask) == 1; +} + +constexpr void DeriveSecretsForArrayIndexHash(HashSeed::Data* data) { + data->m1 = derive_multiplier(data->secrets[0]); + data->m1_inv = derive_multiplier_inverse(data->secrets[0]); + data->m2 = derive_multiplier(data->secrets[1]); + data->m2_inv = derive_multiplier_inverse(data->secrets[1]); + data->m3 = derive_multiplier(data->secrets[2]); + data->m3_inv = derive_multiplier_inverse(data->secrets[2]); +} +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + +static constexpr HashSeed::Data kDefaultSeed = [] { + HashSeed::Data d{}; + d.seed = 0; + d.secrets[0] = RAPIDHASH_DEFAULT_SECRET[0]; + d.secrets[1] = RAPIDHASH_DEFAULT_SECRET[1]; + d.secrets[2] = RAPIDHASH_DEFAULT_SECRET[2]; +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + DeriveSecretsForArrayIndexHash(&d); +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + return d; +}(); + +} // anonymous namespace + +static_assert(HashSeed::kSecretsCount == arraysize(RAPIDHASH_DEFAULT_SECRET)); +const HashSeed::Data* const HashSeed::kDefaultData = &kDefaultSeed; + +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH +// Compile-time verification that m * m_inv === 1 for the derived secrets. +static_assert(is_modular_inverse(kDefaultSeed.m1, kDefaultSeed.m1_inv)); +static_assert(is_modular_inverse(kDefaultSeed.m2, kDefaultSeed.m2_inv)); +static_assert(is_modular_inverse(kDefaultSeed.m3, kDefaultSeed.m3_inv)); +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + +// static +void HashSeed::InitializeRoots(Isolate* isolate) { + DCHECK(!isolate->heap()->deserialization_complete()); + uint64_t seed; + if (v8_flags.hash_seed == 0) { + int64_t rnd = isolate->random_number_generator()->NextInt64(); + seed = static_cast(rnd); + } else { + seed = static_cast(v8_flags.hash_seed); + } + + // Write the seed and derived secrets into the read-only roots ByteArray. + Data* data = const_cast(HashSeed(isolate).data_); + +#if V8_USE_DEFAULT_HASHER_SECRET + // Copy from the default seed and just override the meta seed. + *data = kDefaultSeed; + data->seed = seed; +#else + data->seed = seed; + rapidhash_make_secret(seed, data->secrets); +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + DeriveSecretsForArrayIndexHash(data); + DCHECK(is_modular_inverse(data->m1, data->m1_inv)); + DCHECK(is_modular_inverse(data->m2, data->m2_inv)); + DCHECK(is_modular_inverse(data->m3, data->m3_inv)); +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH +#endif // V8_USE_DEFAULT_HASHER_SECRET +} + +} // namespace internal +} // namespace v8 diff --git a/deps/v8/src/numbers/hash-seed.h b/deps/v8/src/numbers/hash-seed.h new file mode 100644 index 000000000..7cabdf122 --- /dev/null +++ b/deps/v8/src/numbers/hash-seed.h @@ -0,0 +1,102 @@ +// Copyright 2019 the V8 project authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef V8_NUMBERS_HASH_SEED_H_ +#define V8_NUMBERS_HASH_SEED_H_ + +#include +#include +#include + +#include "src/base/macros.h" + +namespace v8 { +namespace internal { + +class Isolate; +class LocalIsolate; +class ReadOnlyRoots; + +// A lightweight view over the hash_seed ByteArray in read-only roots. +class V8_EXPORT_PRIVATE HashSeed { + public: + inline explicit HashSeed(Isolate* isolate); + inline explicit HashSeed(LocalIsolate* isolate); + inline explicit HashSeed(ReadOnlyRoots roots); + + static inline HashSeed Default(); + + inline uint64_t seed() const; + inline const uint64_t* secret() const; + + bool operator==(const HashSeed& b) const { return data_ == b.data_; } + + static constexpr int kSecretsCount = 3; + + // The ReadOnlyRoots::hash_seed() byte array can be interpreted + // as a HashSeed::Data struct. + // Since this maps over either the read-only roots or over a static byte + // array, and in both cases, must be allocated at 8-byte boundaries, + // we don't use V8_OBJECT here. + struct Data { + // meta seed from --hash-seed, 0 = generate at startup + uint64_t seed; + // When V8_USE_DEFAULT_HASHER_SECRET is enabled, these are just + // RAPIDHASH_DEFAULT_SECRET. Otherwise they are derived from the seed + // using rapidhash_make_secret(). + uint64_t secrets[kSecretsCount]; + +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + // Additional precomputed secrets for seeding the array index value hashes. + uint32_t m1; // lower kArrayIndexValueBits bits of secret[0], must be odd + uint32_t m1_inv; // modular inverse of m1 mod 2^kArrayIndexValueBits + uint32_t m2; // lower kArrayIndexValueBits bits of secret[1], must be odd + uint32_t m2_inv; // modular inverse of m2 mod 2^kArrayIndexValueBits + uint32_t m3; // lower kArrayIndexValueBits bits of secret[2], must be odd + uint32_t m3_inv; // modular inverse of m3 mod 2^kArrayIndexValueBits +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + }; + + static constexpr int kTotalSize = sizeof(Data); + +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + // Byte offsets from the data start, for CSA that loads fields at raw + // offsets from the ByteArray data start. + static constexpr int kDerivedM1Offset = offsetof(Data, m1); + static constexpr int kDerivedM1InvOffset = offsetof(Data, m1_inv); + static constexpr int kDerivedM2Offset = offsetof(Data, m2); + static constexpr int kDerivedM2InvOffset = offsetof(Data, m2_inv); + static constexpr int kDerivedM3Offset = offsetof(Data, m3); + static constexpr int kDerivedM3InvOffset = offsetof(Data, m3_inv); + + inline uint32_t m1() const; + inline uint32_t m1_inv() const; + inline uint32_t m2() const; + inline uint32_t m2_inv() const; + inline uint32_t m3() const; + inline uint32_t m3_inv() const; +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + + // Generates a hash seed (from --hash-seed or the RNG) and writes it + // together with derived secrets into the isolate's hash_seed in + // its read-only roots. + static void InitializeRoots(Isolate* isolate); + + private: + // Pointer into the Data overlaying the ByteArray data (either + // points to read-only roots or to kDefaultData). + const Data* data_; + explicit HashSeed(const Data* data) : data_(data) {} + + // Points to the static constexpr default seed. + static const Data* const kDefaultData; +}; + +static_assert(std::is_trivially_copyable_v); +static_assert(alignof(HashSeed::Data) == alignof(uint64_t)); + +} // namespace internal +} // namespace v8 + +#endif // V8_NUMBERS_HASH_SEED_H_ diff --git a/deps/v8/src/objects/dictionary-inl.h b/deps/v8/src/objects/dictionary-inl.h index 05f774222..94fd1efc2 100644 --- a/deps/v8/src/objects/dictionary-inl.h +++ b/deps/v8/src/objects/dictionary-inl.h @@ -275,14 +275,14 @@ bool NumberDictionaryBaseShape::IsMatch(uint32_t key, Object other) { } uint32_t NumberDictionaryBaseShape::Hash(ReadOnlyRoots roots, uint32_t key) { - return ComputeSeededHash(key, HashSeed(roots)); + return ComputeSeededHash(key, HashSeed(roots).seed()); } uint32_t NumberDictionaryBaseShape::HashForObject(ReadOnlyRoots roots, Object other) { DCHECK(other.IsNumber()); return ComputeSeededHash(static_cast(other.Number()), - HashSeed(roots)); + HashSeed(roots).seed()); } Handle NumberDictionaryBaseShape::AsHandle(Isolate* isolate, diff --git a/deps/v8/src/objects/name.h b/deps/v8/src/objects/name.h index c2816d04c..d7a10bf6e 100644 --- a/deps/v8/src/objects/name.h +++ b/deps/v8/src/objects/name.h @@ -153,7 +153,19 @@ class Name : public TorqueGeneratedName { // For strings which are array indexes the hash value has the string length // mixed into the hash, mainly to avoid a hash value of zero which would be // the case for the string '0'. 24 bits are used for the array index value. - static const int kArrayIndexValueBits = 24; + static constexpr int kArrayIndexValueBits = 24; + // Mask for extracting the lower kArrayIndexValueBits of a value. + static constexpr uint32_t kArrayIndexValueMask = + (1u << kArrayIndexValueBits) - 1; +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + // Half-width shift used by the seeded xorshift-multiply mixing. + static constexpr int kArrayIndexHashShift = kArrayIndexValueBits / 2; + // The shift must be at least the half width for the xorshift to be an + // involution. + static_assert(kArrayIndexHashShift * 2 >= kArrayIndexValueBits, + "kArrayIndexHashShift must be at least half of " + "kArrayIndexValueBits"); +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH static const int kArrayIndexLengthBits = kBitsPerInt - kArrayIndexValueBits - HashFieldTypeBits::kSize; diff --git a/deps/v8/src/objects/name.tq b/deps/v8/src/objects/name.tq index 81566e796..3b8ca328d 100644 --- a/deps/v8/src/objects/name.tq +++ b/deps/v8/src/objects/name.tq @@ -54,6 +54,9 @@ macro ContainsCachedArrayIndex(hash: uint32): bool { return (hash & kDoesNotContainCachedArrayIndexMask) == 0; } +extern macro SeedArrayIndexValue(uint32): uint32; +extern macro UnseedArrayIndexValue(uint32): uint32; + const kArrayIndexValueBitsShift: uint32 = kNofHashBitFields; const kArrayIndexLengthBitsShift: uint32 = kNofHashBitFields + kArrayIndexValueBits; @@ -71,8 +74,10 @@ macro IsIntegerIndex(hash: NameHash): bool { return hash.hash_field_type == HashFieldType::kIntegerIndex; } -macro MakeArrayIndexHash(value: uint32, length: uint32): NameHash { - // This is in sync with StringHasher::MakeArrayIndexHash. +// This is in sync with the private StringHasher::MakeArrayIndexHash without +// seeding. Do not call directly, use the @export MakeArrayIndexHash wrapper +// below. +macro MakeArrayIndexHashRaw(value: uint32, length: uint32): NameHash { dcheck(length <= kMaxArrayIndexSize); const one: uint32 = 1; dcheck(TenToThe(kMaxCachedArrayIndexLength) < (one << kArrayIndexValueBits)); @@ -86,3 +91,35 @@ macro MakeArrayIndexHash(value: uint32, length: uint32): NameHash { dcheck(IsIntegerIndex(hash)); return hash; } + +// This is in sync with the private StringHasher::DecodeArrayIndexFromHashField +// without seeding. Do not call directly, use the @export +// DecodeArrayIndexFromHashField wrapper below. +macro DecodeArrayIndexFromHashFieldRaw(rawHashField: uint32): uint32 { + const hash: NameHash = %RawDownCast(rawHashField); + dcheck(ContainsCachedArrayIndex(rawHashField) || IsIntegerIndex(hash)); + return hash.array_index_value; +} + +// Mirror C++ public StringHasher::MakeArrayIndexHash. +@export +macro MakeArrayIndexHash(value: uint32, length: uint32): NameHash { + @if(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) { + return MakeArrayIndexHashRaw(SeedArrayIndexValue(value), length); + } + @ifnot(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) { + return MakeArrayIndexHashRaw(value, length); + } +} + +// Mirror C++ public StringHasher::DecodeArrayIndexFromHashField. +@export +macro DecodeArrayIndexFromHashField(rawHashField: uint32): uint32 { + const value: uint32 = DecodeArrayIndexFromHashFieldRaw(rawHashField); + @if(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) { + return UnseedArrayIndexValue(value); + } + @ifnot(V8_ENABLE_SEEDED_ARRAY_INDEX_HASH) { + return value; + } +} diff --git a/deps/v8/src/objects/objects.cc b/deps/v8/src/objects/objects.cc index 43ea4a735..7285b88f3 100644 --- a/deps/v8/src/objects/objects.cc +++ b/deps/v8/src/objects/objects.cc @@ -4817,22 +4817,6 @@ Address JSArray::ArrayJoinConcatToSequentialString(Isolate* isolate, return dest.ptr(); } -uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, int length) { - // For array indexes mix the length into the hash as an array index could - // be zero. - DCHECK_GT(length, 0); - DCHECK_LE(length, String::kMaxArrayIndexSize); - DCHECK(TenToThe(String::kMaxCachedArrayIndexLength) < - (1 << String::kArrayIndexValueBits)); - - value <<= String::ArrayIndexValueBits::kShift; - value |= length << String::ArrayIndexLengthBits::kShift; - - DCHECK(String::IsIntegerIndex(value)); - DCHECK_EQ(length <= String::kMaxCachedArrayIndexLength, - Name::ContainsCachedArrayIndex(value)); - return value; -} STATIC_ASSERT_FIELD_OFFSETS_EQUAL(HeapNumber::kValueOffset, Oddball::kToNumberRawOffset); diff --git a/deps/v8/src/objects/string-inl.h b/deps/v8/src/objects/string-inl.h index 41bba35d8..125106a58 100644 --- a/deps/v8/src/objects/string-inl.h +++ b/deps/v8/src/objects/string-inl.h @@ -353,7 +353,7 @@ Char FlatStringReader::Get(int index) const { template class SequentialStringKey final : public StringTableKey { public: - SequentialStringKey(const base::Vector& chars, uint64_t seed, + SequentialStringKey(const base::Vector& chars, const HashSeed seed, bool convert = false) : SequentialStringKey(StringHasher::HashSequentialString( chars.begin(), chars.length(), seed), @@ -766,13 +766,14 @@ String::FlatContent::~FlatContent() { #ifdef ENABLE_SLOW_DCHECKS uint32_t String::FlatContent::ComputeChecksum() const { - constexpr uint64_t hashseed = 1; uint32_t hash; if (state_ == ONE_BYTE) { - hash = StringHasher::HashSequentialString(onebyte_start, length_, hashseed); + hash = StringHasher::HashSequentialString(onebyte_start, length_, + HashSeed::Default()); } else { DCHECK_EQ(TWO_BYTE, state_); - hash = StringHasher::HashSequentialString(twobyte_start, length_, hashseed); + hash = StringHasher::HashSequentialString(twobyte_start, length_, + HashSeed::Default()); } DCHECK_NE(kChecksumVerificationDisabled, hash); return hash; @@ -1440,7 +1441,8 @@ bool String::AsArrayIndex(uint32_t* index) { DisallowGarbageCollection no_gc; uint32_t field = raw_hash_field(); if (ContainsCachedArrayIndex(field)) { - *index = ArrayIndexValueBits::decode(field); + *index = StringHasher::DecodeArrayIndexFromHashField( + field, HashSeed(EarlyGetReadOnlyRoots())); return true; } if (IsHashFieldComputed(field) && !IsIntegerIndex(field)) { @@ -1452,7 +1454,8 @@ bool String::AsArrayIndex(uint32_t* index) { bool String::AsIntegerIndex(size_t* index) { uint32_t field = raw_hash_field(); if (ContainsCachedArrayIndex(field)) { - *index = ArrayIndexValueBits::decode(field); + *index = StringHasher::DecodeArrayIndexFromHashField( + field, HashSeed(EarlyGetReadOnlyRoots())); return true; } if (IsHashFieldComputed(field) && !IsIntegerIndex(field)) { diff --git a/deps/v8/src/objects/string-table.cc b/deps/v8/src/objects/string-table.cc index bf0f9fceb..4f02f7f42 100644 --- a/deps/v8/src/objects/string-table.cc +++ b/deps/v8/src/objects/string-table.cc @@ -728,7 +728,7 @@ Address StringTable::Data::TryStringToIndexOrLookupExisting(Isolate* isolate, return internalized.ptr(); } - uint64_t seed = HashSeed(isolate); + const HashSeed seed = HashSeed(isolate); std::unique_ptr buffer; const Char* chars; @@ -750,11 +750,13 @@ Address StringTable::Data::TryStringToIndexOrLookupExisting(Isolate* isolate, } // TODO(verwaest): Internalize to one-byte when possible. SequentialStringKey key(raw_hash_field, - base::Vector(chars, length), seed); + base::Vector(chars, length), + seed.seed()); // String could be an array index. if (Name::ContainsCachedArrayIndex(raw_hash_field)) { - return Smi::FromInt(String::ArrayIndexValueBits::decode(raw_hash_field)) + return Smi::FromInt(StringHasher::DecodeArrayIndexFromHashField( + raw_hash_field, seed)) .ptr(); } diff --git a/deps/v8/src/objects/string.cc b/deps/v8/src/objects/string.cc index e2f18a6e8..029285c51 100644 --- a/deps/v8/src/objects/string.cc +++ b/deps/v8/src/objects/string.cc @@ -818,7 +818,8 @@ Handle String::ToNumber(Isolate* isolate, Handle subject) { (len == 1 || data[0] != '0')) { // String hash is not calculated yet but all the data are present. // Update the hash field to speed up sequential convertions. - uint32_t raw_hash_field = StringHasher::MakeArrayIndexHash(d, len); + uint32_t raw_hash_field = + StringHasher::MakeArrayIndexHash(d, len, HashSeed(isolate)); #ifdef DEBUG subject->EnsureHash(); // Force hash calculation. DCHECK_EQ(subject->raw_hash_field(), raw_hash_field); @@ -1680,7 +1681,8 @@ bool String::IsIdentifier(Isolate* isolate, Handle str) { namespace { template -uint32_t HashString(String string, size_t start, int length, uint64_t seed, +uint32_t HashString(String string, size_t start, int length, + const HashSeed seed, PtrComprCageBase cage_base, const SharedStringAccessGuardIfNeeded& access_guard) { DisallowGarbageCollection no_gc; @@ -1726,7 +1728,7 @@ uint32_t String::ComputeAndSetRawHash( DCHECK_IMPLIES(!v8_flags.shared_string_table, !HasHashCode()); // Store the hash code in the object. - uint64_t seed = HashSeed(EarlyGetReadOnlyRoots()); + const HashSeed seed = HashSeed(EarlyGetReadOnlyRoots()); size_t start = 0; String string = *this; PtrComprCageBase cage_base = GetPtrComprCageBase(string); @@ -1772,7 +1774,8 @@ bool String::SlowAsArrayIndex(uint32_t* index) { if (length <= kMaxCachedArrayIndexLength) { uint32_t field = EnsureRawHash(); // Force computation of hash code. if (!IsIntegerIndex(field)) return false; - *index = ArrayIndexValueBits::decode(field); + *index = StringHasher::DecodeArrayIndexFromHashField( + field, HashSeed(EarlyGetReadOnlyRoots())); return true; } if (length == 0 || length > kMaxArrayIndexSize) return false; @@ -1786,7 +1789,8 @@ bool String::SlowAsIntegerIndex(size_t* index) { if (length <= kMaxCachedArrayIndexLength) { uint32_t field = EnsureRawHash(); // Force computation of hash code. if (!IsIntegerIndex(field)) return false; - *index = ArrayIndexValueBits::decode(field); + *index = StringHasher::DecodeArrayIndexFromHashField( + field, HashSeed(EarlyGetReadOnlyRoots())); return true; } if (length == 0 || length > kMaxIntegerIndexSize) return false; diff --git a/deps/v8/src/parsing/parse-info.h b/deps/v8/src/parsing/parse-info.h index 404fbf9ac..7ee106a68 100644 --- a/deps/v8/src/parsing/parse-info.h +++ b/deps/v8/src/parsing/parse-info.h @@ -12,6 +12,7 @@ #include "src/base/logging.h" #include "src/common/globals.h" #include "src/handles/handles.h" +#include "src/numbers/hash-seed.h" #include "src/objects/function-kind.h" #include "src/objects/function-syntax-kind.h" #include "src/objects/script.h" @@ -200,7 +201,7 @@ class V8_EXPORT_PRIVATE ReusableUnoptimizedCompileState { AstValueFactory* ast_value_factory() const { return ast_value_factory_.get(); } - uint64_t hash_seed() const { return hash_seed_; } + HashSeed hash_seed() const { return hash_seed_; } AccountingAllocator* allocator() const { return allocator_; } const AstStringConstants* ast_string_constants() const { return ast_string_constants_; @@ -210,7 +211,7 @@ class V8_EXPORT_PRIVATE ReusableUnoptimizedCompileState { LazyCompileDispatcher* dispatcher() const { return dispatcher_; } private: - uint64_t hash_seed_; + const HashSeed hash_seed_; AccountingAllocator* allocator_; V8FileLogger* v8_file_logger_; LazyCompileDispatcher* dispatcher_; @@ -245,7 +246,7 @@ class V8_EXPORT_PRIVATE ParseInfo { const UnoptimizedCompileFlags& flags() const { return flags_; } // Getters for reusable state. - uint64_t hash_seed() const { return reusable_state_->hash_seed(); } + HashSeed hash_seed() const { return reusable_state_->hash_seed(); } AccountingAllocator* allocator() const { return reusable_state_->allocator(); } diff --git a/deps/v8/src/profiler/heap-snapshot-generator-inl.h b/deps/v8/src/profiler/heap-snapshot-generator-inl.h index c1fdd65e5..3057a5e1d 100644 --- a/deps/v8/src/profiler/heap-snapshot-generator-inl.h +++ b/deps/v8/src/profiler/heap-snapshot-generator-inl.h @@ -55,8 +55,7 @@ Isolate* HeapEntry::isolate() const { return snapshot_->profiler()->isolate(); } uint32_t HeapSnapshotJSONSerializer::StringHash(const void* string) { const char* s = reinterpret_cast(string); int len = static_cast(strlen(s)); - return StringHasher::HashSequentialString(s, len, - v8::internal::kZeroHashSeed); + return StringHasher::HashSequentialString(s, len, HashSeed::Default()); } int HeapSnapshotJSONSerializer::to_node_index(const HeapEntry* e) { diff --git a/deps/v8/src/profiler/strings-storage.cc b/deps/v8/src/profiler/strings-storage.cc index a1bae849b..00eebd3ef 100644 --- a/deps/v8/src/profiler/strings-storage.cc +++ b/deps/v8/src/profiler/strings-storage.cc @@ -137,7 +137,7 @@ namespace { inline uint32_t ComputeStringHash(const char* str, int len) { uint32_t raw_hash_field = - StringHasher::HashSequentialString(str, len, kZeroHashSeed); + StringHasher::HashSequentialString(str, len, HashSeed::Default()); return Name::HashBits::decode(raw_hash_field); } diff --git a/deps/v8/src/runtime/runtime.cc b/deps/v8/src/runtime/runtime.cc index b878193ee..e98e7f32d 100644 --- a/deps/v8/src/runtime/runtime.cc +++ b/deps/v8/src/runtime/runtime.cc @@ -65,8 +65,8 @@ struct IntrinsicFunctionIdentifier { } uint32_t Hash() { - return StringHasher::HashSequentialString( - data_, length_, v8::internal::kZeroHashSeed); + return StringHasher::HashSequentialString(data_, length_, + HashSeed::Default()); } const unsigned char* data_; diff --git a/deps/v8/src/snapshot/read-only-deserializer.cc b/deps/v8/src/snapshot/read-only-deserializer.cc index ea86170a8..28d9af800 100644 --- a/deps/v8/src/snapshot/read-only-deserializer.cc +++ b/deps/v8/src/snapshot/read-only-deserializer.cc @@ -10,6 +10,7 @@ #include "src/heap/heap-inl.h" // crbug.com/v8/8499 #include "src/heap/read-only-heap.h" #include "src/logging/counters-scopes.h" +#include "src/numbers/hash-seed.h" #include "src/objects/slots.h" #include "src/roots/static-roots.h" #include "src/snapshot/snapshot-data.h" @@ -73,7 +74,7 @@ void ReadOnlyDeserializer::DeserializeIntoIsolate() { PostProcessNewObjectsIfStaticRootsEnabled(); if (should_rehash()) { - isolate()->heap()->InitializeHashSeed(); + HashSeed::InitializeRoots(isolate()); Rehash(); } } diff --git a/deps/v8/src/snapshot/shared-heap-deserializer.cc b/deps/v8/src/snapshot/shared-heap-deserializer.cc index f6cd57c90..2bcb10a10 100644 --- a/deps/v8/src/snapshot/shared-heap-deserializer.cc +++ b/deps/v8/src/snapshot/shared-heap-deserializer.cc @@ -24,7 +24,8 @@ void SharedHeapDeserializer::DeserializeIntoIsolate() { DeserializeDeferredObjects(); if (should_rehash()) { - // Hash seed was initialized in ReadOnlyDeserializer. + // The hash seed has already been initialized in ReadOnlyDeserializer, thus + // there is no need to call `HashSeed::InitializeRoots(isolate());`. Rehash(); } } diff --git a/deps/v8/src/strings/string-hasher-inl.h b/deps/v8/src/strings/string-hasher-inl.h index 4a6c7ada4..4909dd3e0 100644 --- a/deps/v8/src/strings/string-hasher-inl.h +++ b/deps/v8/src/strings/string-hasher-inl.h @@ -10,6 +10,7 @@ // Comment inserted to prevent header reordering. #include +#include "src/common/globals.h" #include "src/objects/name-inl.h" #include "src/objects/string-inl.h" #include "src/strings/char-predicates-inl.h" @@ -46,9 +47,89 @@ uint32_t StringHasher::GetTrivialHash(int length) { return String::CreateHashFieldValue(hash, String::HashFieldType::kHash); } +uint32_t StringHasher::MakeArrayIndexHash(uint32_t value, uint32_t length) { + // For array indexes mix the length into the hash as an array index could + // be zero. + DCHECK_LE(length, String::kMaxArrayIndexSize); + DCHECK(TenToThe(String::kMaxCachedArrayIndexLength) < + (1 << String::kArrayIndexValueBits)); + + value <<= String::ArrayIndexValueBits::kShift; + value |= length << String::ArrayIndexLengthBits::kShift; + + DCHECK(String::IsIntegerIndex(value)); + DCHECK_EQ(length <= String::kMaxCachedArrayIndexLength, + Name::ContainsCachedArrayIndex(value)); + return value; +} + +uint32_t StringHasher::DecodeArrayIndexFromHashField(uint32_t raw_hash_field) { + DCHECK(String::ContainsCachedArrayIndex(raw_hash_field) || + String::IsIntegerIndex(raw_hash_field)); + return String::ArrayIndexValueBits::decode(raw_hash_field); +} + +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH +uint32_t StringHasher::SeedArrayIndexValue(uint32_t value, + const HashSeed seed) { + uint32_t m1 = seed.m1(); + uint32_t m2 = seed.m2(); + uint32_t m3 = seed.m3(); + constexpr uint32_t kShift = Name::kArrayIndexHashShift; + constexpr uint32_t kMask = Name::kArrayIndexValueMask; + // 3-round xorshift-multiply. + uint32_t x = value; + x ^= x >> kShift; + x = (x * m1) & kMask; + x ^= x >> kShift; + x = (x * m2) & kMask; + x ^= x >> kShift; + x = (x * m3) & kMask; + x ^= x >> kShift; + return x; +} + +uint32_t StringHasher::UnseedArrayIndexValue(uint32_t value, + const HashSeed seed) { + uint32_t m1_inv = seed.m1_inv(); + uint32_t m2_inv = seed.m2_inv(); + uint32_t m3_inv = seed.m3_inv(); + uint32_t x = value; + constexpr uint32_t kShift = Name::kArrayIndexHashShift; + constexpr uint32_t kMask = Name::kArrayIndexValueMask; + // 3-round xorshift-multiply (inverse). + // Xorshift is an involution when kShift is at least half of the value width. + x ^= x >> kShift; + x = (x * m3_inv) & kMask; + x ^= x >> kShift; + x = (x * m2_inv) & kMask; + x ^= x >> kShift; + x = (x * m1_inv) & kMask; + x ^= x >> kShift; + return x; +} +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + +uint32_t StringHasher::MakeArrayIndexHash( + uint32_t value, int length, [[maybe_unused]] const HashSeed seed) { +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + value = SeedArrayIndexValue(value, seed); +#endif + return MakeArrayIndexHash(value, static_cast(length)); +} + +uint32_t StringHasher::DecodeArrayIndexFromHashField( + uint32_t raw_hash_field, [[maybe_unused]] const HashSeed seed) { + uint32_t value = DecodeArrayIndexFromHashField(raw_hash_field); +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + value = UnseedArrayIndexValue(value, seed); +#endif + return value; +} + template uint32_t StringHasher::HashSequentialString(const char_t* chars_raw, int length, - uint64_t seed) { + const HashSeed seed) { static_assert(std::is_integral::value); static_assert(sizeof(char_t) <= 2); using uchar = typename std::make_unsigned::type; @@ -63,7 +144,7 @@ uint32_t StringHasher::HashSequentialString(const char_t* chars_raw, int length, int i = 1; do { if (i == length) { - return MakeArrayIndexHash(index, length); + return MakeArrayIndexHash(index, length, seed); } } while (TryAddArrayIndexChar(&index, chars[i++])); } @@ -80,7 +161,7 @@ uint32_t StringHasher::HashSequentialString(const char_t* chars_raw, int length, // Perform a regular hash computation, and additionally check // if there are non-digit characters. String::HashFieldType type = String::HashFieldType::kIntegerIndex; - uint32_t running_hash = static_cast(seed); + uint32_t running_hash = static_cast(seed.seed()); uint64_t index_big = 0; const uchar* end = &chars[length]; while (chars != end) { @@ -112,7 +193,7 @@ uint32_t StringHasher::HashSequentialString(const char_t* chars_raw, int length, } // Non-index hash. - uint32_t running_hash = static_cast(seed); + uint32_t running_hash = static_cast(seed.seed()); const uchar* end = &chars[length]; while (chars != end) { running_hash = AddCharacterCore(running_hash, *chars++); diff --git a/deps/v8/src/strings/string-hasher.h b/deps/v8/src/strings/string-hasher.h index c26fe7942..970da500f 100644 --- a/deps/v8/src/strings/string-hasher.h +++ b/deps/v8/src/strings/string-hasher.h @@ -6,6 +6,7 @@ #define V8_STRINGS_STRING_HASHER_H_ #include "src/common/globals.h" +#include "src/numbers/hash-seed.h" namespace v8 { @@ -23,12 +24,25 @@ class V8_EXPORT_PRIVATE StringHasher final { StringHasher() = delete; template static inline uint32_t HashSequentialString(const char_t* chars, int length, - uint64_t seed); + const HashSeed seed); - // Calculated hash value for a string consisting of 1 to + // Calculate the hash value for a string consisting of 1 to // String::kMaxArrayIndexSize digits with no leading zeros (except "0"). - // value is represented decimal value. - static uint32_t MakeArrayIndexHash(uint32_t value, int length); + // + // The entire hash field consists of (from least significant bit to most): + // - HashFieldType::kIntegerIndex + // - kArrayIndexValueBits::kSize bits containing the hash value + // - The length of the decimal string + // + // When V8_ENABLE_SEEDED_ARRAY_INDEX_HASH is enabled, the numeric value + // is scrambled using secrets derived from the hash seed. When it's disabled + // the public overloads ignore the seed, whose retrieval should be optimized + // away in common configurations. + static V8_INLINE uint32_t MakeArrayIndexHash(uint32_t value, int length, + const HashSeed seed); + // Decode array index value from raw hash field and reverse seeding, if any. + static V8_INLINE uint32_t + DecodeArrayIndexFromHashField(uint32_t raw_hash_field, const HashSeed seed); // No string is allowed to have a hash of zero. That value is reserved // for internal properties. If the hash calculation yields zero then we @@ -40,14 +54,48 @@ class V8_EXPORT_PRIVATE StringHasher final { V8_INLINE static uint32_t GetHashCore(uint32_t running_hash); static inline uint32_t GetTrivialHash(int length); + + private: + // Raw encode/decode without seeding. Use the public overloads above. + static V8_INLINE uint32_t MakeArrayIndexHash(uint32_t value, uint32_t length); + static V8_INLINE uint32_t + DecodeArrayIndexFromHashField(uint32_t raw_hash_field); + +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + // When V8_ENABLE_SEEDED_ARRAY_INDEX_HASH is enabled, the numeric value + // will be scrambled with 3 rounds of xorshift-multiply. + // + // x ^= x >> kShift; x = (x * m1) & kMask; // round 1 + // x ^= x >> kShift; x = (x * m2) & kMask; // round 2 + // x ^= x >> kShift; x = (x * m3) & kMask; // round 3 + // x ^= x >> kShift; // finalize + // + // To decode, apply the same steps with the modular inverses of m1, m2 + // and m3 in reverse order. + // + // x ^= x >> kShift; x = (x * m3_inv) & kMask; // round 1 + // x ^= x >> kShift; x = (x * m2_inv) & kMask; // round 2 + // x ^= x >> kShift; x = (x * m1_inv) & kMask; // round 3 + // x ^= x >> kShift; // finalize + // + // where kShift = kArrayIndexValueBits / 2, kMask = kArrayIndexValueMask, + // m1, m2, m3 (all odd) are derived from the Isolate's rapidhash secrets. + // m1_inv, m2_inv, m3_inv (modular inverses) are precomputed so that + // UnseedArrayIndexValue can quickly recover the original value. + static V8_INLINE uint32_t SeedArrayIndexValue(uint32_t value, + const HashSeed seed); + // Decode array index value from seeded raw hash field. + static V8_INLINE uint32_t UnseedArrayIndexValue(uint32_t value, + const HashSeed seed); +#endif // V8_ENABLE_SEEDED_ARRAY_INDEX_HASH }; // Useful for std containers that require something ()'able. struct SeededStringHasher { - explicit SeededStringHasher(uint64_t hashseed) : hashseed_(hashseed) {} + explicit SeededStringHasher(const HashSeed hashseed) : hashseed_(hashseed) {} inline std::size_t operator()(const char* name) const; - uint64_t hashseed_; + const HashSeed hashseed_; }; // Useful for std containers that require something ()'able. diff --git a/deps/v8/src/torque/torque-parser.cc b/deps/v8/src/torque/torque-parser.cc index 2faee2488..1da0a693b 100644 --- a/deps/v8/src/torque/torque-parser.cc +++ b/deps/v8/src/torque/torque-parser.cc @@ -73,6 +73,11 @@ class BuildFlags : public base::ContextualClass { #endif build_flags_["V8_ENABLE_SANDBOX"] = V8_ENABLE_SANDBOX_BOOL; build_flags_["DEBUG"] = DEBUG_BOOL; +#ifdef V8_ENABLE_SEEDED_ARRAY_INDEX_HASH + build_flags_["V8_ENABLE_SEEDED_ARRAY_INDEX_HASH"] = true; +#else + build_flags_["V8_ENABLE_SEEDED_ARRAY_INDEX_HASH"] = false; +#endif } static bool GetFlag(const std::string& name, const char* production) { auto it = Get().build_flags_.find(name); diff --git a/deps/v8/src/utils/utils.h b/deps/v8/src/utils/utils.h index 478c03e62..446e58440 100644 --- a/deps/v8/src/utils/utils.h +++ b/deps/v8/src/utils/utils.h @@ -231,8 +231,6 @@ inline T RoundingAverageUnsigned(T a, T b) { // ---------------------------------------------------------------------------- // Hash function. -static const uint64_t kZeroHashSeed = 0; - // Thomas Wang, Integer Hash Functions. // http://www.concentric.net/~Ttwang/tech/inthash.htm` inline uint32_t ComputeUnseededHash(uint32_t key) { diff --git a/deps/v8/src/wasm/wasm-module.cc b/deps/v8/src/wasm/wasm-module.cc index b0412620b..44697732a 100644 --- a/deps/v8/src/wasm/wasm-module.cc +++ b/deps/v8/src/wasm/wasm-module.cc @@ -662,7 +662,7 @@ int JumpTableOffset(const WasmModule* module, int func_index) { size_t GetWireBytesHash(base::Vector wire_bytes) { return StringHasher::HashSequentialString( reinterpret_cast(wire_bytes.begin()), wire_bytes.length(), - kZeroHashSeed); + HashSeed::Default()); } int NumFeedbackSlots(const WasmModule* module, int func_index) { diff --git a/deps/v8/third_party/rapidhash-v8/LICENSE b/deps/v8/third_party/rapidhash-v8/LICENSE new file mode 100644 index 000000000..e70352870 --- /dev/null +++ b/deps/v8/third_party/rapidhash-v8/LICENSE @@ -0,0 +1,34 @@ +/* + * rapidhash - Very fast, high quality, platform independant hashing algorithm. + * Copyright (C) 2024 Nicolas De Carli + * + * Based on 'wyhash', by Wang Yi + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - rapidhash source repository: https://github.com/Nicoshev/rapidhash + */ diff --git a/deps/v8/third_party/rapidhash-v8/OWNERS b/deps/v8/third_party/rapidhash-v8/OWNERS new file mode 100644 index 000000000..6a5523a74 --- /dev/null +++ b/deps/v8/third_party/rapidhash-v8/OWNERS @@ -0,0 +1,2 @@ +leszeks@chromium.org +mlippautz@chromium.org diff --git a/deps/v8/third_party/rapidhash-v8/README.chromium b/deps/v8/third_party/rapidhash-v8/README.chromium new file mode 100644 index 000000000..be5b85de6 --- /dev/null +++ b/deps/v8/third_party/rapidhash-v8/README.chromium @@ -0,0 +1,20 @@ +Name: rapidhash +URL: https://github.com/Nicoshev/rapidhash/blob/master/rapidhash.h +Version: N/A +Date: 2024-07-08 +Revision: 588978411df8683777429f729be5213eb1bfd5f3 +License: BSD 2-clause +License File: LICENSE +Shipped: Yes +Security Critical: yes + +Description: +A fast hash function. + +This is a fork of upstream, with the parts that we don't need or want +removed. We do not intend to sync regularly with upstream git. This +particular version is copied over from Chromium. + +Local Modifications: +- Copied over from Chromium's third_party/rapidhash with all its changes. +- Removed base/ includes and replaced with V8 versions. diff --git a/deps/v8/third_party/rapidhash-v8/rapidhash.h b/deps/v8/third_party/rapidhash-v8/rapidhash.h new file mode 100644 index 000000000..c7980f1cf --- /dev/null +++ b/deps/v8/third_party/rapidhash-v8/rapidhash.h @@ -0,0 +1,285 @@ +/* + * rapidhash - Very fast, high quality, platform-independent hashing algorithm. + * Copyright (C) 2024 Nicolas De Carli + * + * Based on 'wyhash', by Wang Yi + * + * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * You can contact the author at: + * - rapidhash source repository: https://github.com/Nicoshev/rapidhash + */ + +#ifndef _THIRD_PARTY_RAPIDHASH_H +#define _THIRD_PARTY_RAPIDHASH_H 1 + +#include +#include +#include + +#include +#include + +#include "include/v8config.h" +#include "src/base/logging.h" + +// Chromium has some local modifications to upstream rapidhash, +// mostly around the concept of HashReaders (including slightly +// more comments for ease of understanding). Generally, rapidhash +// hashes bytes without really caring what these bytes are, +// but often in Chromium, we want to hash _strings_, and strings +// can have multiple representations. In particular, the WTF +// StringImpl class (and by extension, String and AtomicString) +// can have three different states: +// +// 1. Latin1 (or ASCII) code points, in 8 bits per character (LChar). +// 2. The same code points, in 16 bits per character (UChar). +// 3. Strings actually including non-Latin1 code points, in 16 bits +// per character (UChar, UTF-16 encoding). +// +// The problem is that we'd like to hash case #1 and #2 to actually +// return the same hash. There are several ways we could deal with this: +// +// a) Just ignore the problem and hash everything as raw bytes; +// case #2 is fairly rare, and some algorithms (e.g. opportunistic +// deduplication) could live with some false negatives. +// b) Expand all strings to UTF-16 before hashing. This was the +// strategy for the old StringHasher (using SuperFastHash), +// as it naturally worked in 16-bit increments and it is probably +// the simplest. However, this both halves the throughput of the +// hasher and incurs conversion costs. +// c) Detect #2 and convert those cases to #1 (UTF-16 to Latin1 +// conversion), and apart from that, juts hash bytes. +// +// b) is used in e.g. CaseFoldingHash, but c) is the one we use the most +// often. Most strings that we want to hash are 8-bit (e.g. HTML tags), so +// that makes the common case fast. And for UChar strings, it is fairly fast +// to make a pre-pass over the string to check for the existence of any +// non-Latin1 characters. Of course, #1 and #3 can just be hashed as raw +// bytes, as strings from those groups can never be equal anyway. +// +// To support these 8-to-16 and 16-to-8 conversions, and also things like +// lowercasing on the fly, we have modified rapidhash to be templatized +// on a HashReader, supplying bytes to the hash function. For the default +// case of just hashing raw bytes, this is simply reading. But for other +// conversions, the reader is doing slightly more work, such as packing +// or unpacking. See e.g. ConvertTo8BitHashReader in string_hasher.h. +// +// Note that this reader could be made constexpr if we needed to evaluate +// hashes at compile-time. +struct PlainHashReader { + // If this is different from 1 (only 1, 2 and 4 are really reasonable + // options), it means the reader consumes its input at a faster rate than + // would be normally expected. For instance, if it is 2, it means that when + // the reader produces 64 bits, it needs to move its input 128 bits + // ahead. This is used when e.g. a reader converts from UTF-16 to ASCII, + // by removing zeros. The input length must divide the compression factor. + static constexpr unsigned kCompressionFactor = 1; + + // This is the opposite of kExpansionFactor. It does not make sense to have + // both kCompressionFactor and kExpansionFactor different from 1. + // The output length must divide the expansion factor. + static constexpr unsigned kExpansionFactor = 1; + + // Read 64 little-endian bits from the input, taking into account + // the expansion/compression if relevant. Unaligned reads must be + // supported. + static inline uint64_t Read64(const uint8_t* p) { + uint64_t v; + memcpy(&v, p, 8); + return v; + } + + // Similarly, read 32 little-endian (unaligned) bits. Note that + // this must return uint64_t, _not_ uint32_t, as the hasher assumes + // it can freely shift such numbers up and down. + static inline uint64_t Read32(const uint8_t* p) { + uint32_t v; + memcpy(&v, p, 4); + return v; + } + + // Read 1, 2 or 3 bytes from the input, and distribute them somewhat + // reasonably across the resulting 64-bit number. Note that this is + // done in a branch-free fashion, so that it always reads three bytes + // but never reads past the end. + // + // This is only used in the case where we hash a string of exactly + // 1, 2 or 3 bytes; if the hash is e.g. 7 bytes, two overlapping 32-bit + // reads will be done instead. Note that if you have kCompressionFactor == 2, + // this means that this will only ever be called with k = 2. + static inline uint64_t ReadSmall(const uint8_t* p, size_t k) { + return (uint64_t{p[0]} << 56) | (uint64_t{p[k >> 1]} << 32) | p[k - 1]; + } +}; + +/* + * Likely and unlikely macros. + */ +#define _likely_(x) __builtin_expect(x, 1) +#define _unlikely_(x) __builtin_expect(x, 0) + +/* + * 64*64 -> 128bit multiply function. + * + * @param A Address of 64-bit number. + * @param B Address of 64-bit number. + * + * Calculates 128-bit C = *A * *B. + */ +inline std::pair rapid_mul128(uint64_t A, uint64_t B) { +#if defined(__SIZEOF_INT128__) + __uint128_t r = A; + r *= B; + return {static_cast(r), static_cast(r >> 64)}; +#else + // High and low 32 bits of A and B. + uint64_t a_high = A >> 32, b_high = B >> 32, a_low = (uint32_t)A, + b_low = (uint32_t)B; + + // Intermediate products. + uint64_t result_high = a_high * b_high; + uint64_t result_m0 = a_high * b_low; + uint64_t result_m1 = b_high * a_low; + uint64_t result_low = a_low * b_low; + + // Final sum. The lower 64-bit addition can overflow twice, + // so accumulate carry as we go. + uint64_t high = result_high + (result_m0 >> 32) + (result_m1 >> 32); + uint64_t t = result_low + (result_m0 << 32); + high += (t < result_low); // Carry. + uint64_t low = t + (result_m1 << 32); + high += (low < t); // Carry. + + return {low, high}; +#endif +} + +/* + * Multiply and xor mix function. + * + * @param A 64-bit number. + * @param B 64-bit number. + * + * Calculates 128-bit C = A * B. + * Returns 64-bit xor between high and low 64 bits of C. + */ +inline uint64_t rapid_mix(uint64_t A, uint64_t B) { + std::tie(A, B) = rapid_mul128(A, B); + return A ^ B; +} + +/* + * rapidhash main function. + * + * @param key Buffer to be hashed. + * @param len Number of input bytes coming from the reader. + * @param seed 64-bit seed used to alter the hash result predictably. + * @param secret Triplet of 64-bit secrets used to alter hash result + * predictably. + * + * Returns a 64-bit hash. + * + * The data flow is separated so that we never mix input data with pointers; + * + * a, b, seed, secret[0], secret[1], secret[2], see1 and see2 are affected + * by the input data. + * + * p is only ever indexed by len, delta (comes from len only), i (comes from + * len only) or integral constants. len is const, so no data can flow into it. + * + * No other reads from memory take place. No writes to memory take place. + */ +template +V8_INLINE uint64_t rapidhash(const uint8_t* p, const size_t len, uint64_t seed, + const uint64_t secret[3]) { + // For brevity. + constexpr unsigned x = Reader::kCompressionFactor; + constexpr unsigned y = Reader::kExpansionFactor; + DCHECK_EQ(len % y, 0u); + + seed ^= rapid_mix(seed ^ secret[0], secret[1]) ^ len; + uint64_t a, b; + if (_likely_(len <= 16)) { + if (_likely_(len >= 4)) { + // Read the first and last 32 bits (they may overlap). + const uint8_t* plast = p + (len - 4) * x / y; + a = (Reader::Read32(p) << 32) | Reader::Read32(plast); + + // This is equivalent to: delta = (len >= 8) ? 4 : 0; + const uint64_t delta = ((len & 24) >> (len >> 3)) * x / y; + b = ((Reader::Read32(p + delta) << 32) | Reader::Read32(plast - delta)); + } else if (_likely_(len > 0)) { + // 1, 2 or 3 bytes. + a = Reader::ReadSmall(p, len); + b = 0; + } else { + a = b = 0; + } + } else { + size_t i = len; + if (_unlikely_(i > 48)) { + uint64_t see1 = seed, see2 = seed; + do { + seed = rapid_mix(Reader::Read64(p) ^ secret[0], + Reader::Read64(p + 8 * x / y) ^ seed); + see1 = rapid_mix(Reader::Read64(p + 16 * x / y) ^ secret[1], + Reader::Read64(p + 24 * x / y) ^ see1); + see2 = rapid_mix(Reader::Read64(p + 32 * x / y) ^ secret[2], + Reader::Read64(p + 40 * x / y) ^ see2); + p += 48 * x / y; + i -= 48; + } while (_likely_(i >= 48)); + seed ^= see1 ^ see2; + } + if (i > 16) { + seed = rapid_mix(Reader::Read64(p) ^ secret[2], + Reader::Read64(p + 8 * x / y) ^ seed ^ secret[1]); + if (i > 32) { + seed = rapid_mix(Reader::Read64(p + 16 * x / y) ^ secret[2], + Reader::Read64(p + 24 * x / y) ^ seed); + } + } + + // Read the last 2x64 bits. Note that due to the division by y, + // this must be a signed quantity (or the division would become + // unsigned, possibly moving the sign bit down into the address). + // Similarly for x. + a = Reader::Read64(p + (static_cast(i) - 16) * + static_cast(x) / static_cast(y)); + b = Reader::Read64(p + (static_cast(i) - 8) * + static_cast(x) / static_cast(y)); + } + a ^= secret[1]; + b ^= seed; + std::tie(a, b) = rapid_mul128(a, b); + return rapid_mix(a ^ secret[0] ^ len, b ^ secret[1]); +} + +#undef _likely_ +#undef _unlikely_ + +#endif // _THIRD_PARTY_RAPIDHASH_H diff --git a/deps/v8/third_party/rapidhash-v8/secret.h b/deps/v8/third_party/rapidhash-v8/secret.h new file mode 100644 index 000000000..021125bc4 --- /dev/null +++ b/deps/v8/third_party/rapidhash-v8/secret.h @@ -0,0 +1,198 @@ +/* + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * For more information, please refer to + * + * author: 王一 Wang Yi + * contributors: Reini Urban, Dietrich Epp, Joshua Haberman, Tommy Ettinger, + * Daniel Lemire, Otmar Ertl, cocowalla, leo-yuriev, Diego Barrios Romero, + * paulie-g, dumblob, Yann Collet, ivte-ms, hyb, James Z.M. Gao, easyaspi314 + * (Devin), TheOneric + */ + +#ifndef _THIRD_PARTY_RAPIDHASH_SECRET_H +#define _THIRD_PARTY_RAPIDHASH_SECRET_H 1 + +#include "rapidhash.h" +#include "src/base/bits.h" + +// Default secret parameters. If we wanted to, we could generate our own +// versions of these at renderer startup in order to perturb the hash +// and make it more DoS-resistant (similar to what base/hash.h does), +// but generating new ones takes a little bit of time (~200 µs on a desktop +// machine as of 2024), and good-quality random numbers may not be copious +// from within the sandbox. The secret concept is inherited from wyhash, +// described by the wyhash author here: +// +// https://github.com/wangyi-fudan/wyhash/issues/139 +// +// The rules are: +// +// 1. Each byte must be “balanced”, i.e., have exactly 4 bits set. +// (This is trivially done by just precompting a LUT with the +// possible bytes and picking from those.) +// +// 2. Each 64-bit group should have a Hamming distance of 32 to +// all the others (i.e., popcount(secret[i] ^ secret[j]) == 32). +// This is just done by rejection sampling. +// +// 3. Each 64-bit group should be prime. It's not obvious that this +// is really needed for the hash, as opposed to wyrand which also +// uses the same secret, but according to the author, it is +// “a feeling to be perfect”. This naturally means the last byte +// cannot be divisible by 2, but apart from that, is easiest +// checked by testing a few small factors and then the Miller-Rabin +// test, which rejects nearly all bad candidates in the first iteration +// and is fast as long as we have 64x64 -> 128 bit muls and modulos. + +static constexpr uint64_t RAPIDHASH_DEFAULT_SECRET[3] = { + 0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull}; + +#if !V8_USE_DEFAULT_HASHER_SECRET + +namespace detail { + +static inline uint64_t wyrand(uint64_t* seed) { + *seed += 0x2d358dccaa6c78a5ull; + return rapid_mix(*seed, *seed ^ 0x8bb84b93962eacc9ull); +} + +static inline unsigned long long mul_mod(unsigned long long a, + unsigned long long b, + unsigned long long m) { + unsigned long long r = 0; + while (b) { + if (b & 1) { + unsigned long long r2 = r + a; + if (r2 < r) r2 -= m; + r = r2 % m; + } + b >>= 1; + if (b) { + unsigned long long a2 = a + a; + if (a2 < a) a2 -= m; + a = a2 % m; + } + } + return r; +} + +static inline unsigned long long pow_mod(unsigned long long a, + unsigned long long b, + unsigned long long m) { + unsigned long long r = 1; + while (b) { + if (b & 1) r = mul_mod(r, a, m); + b >>= 1; + if (b) a = mul_mod(a, a, m); + } + return r; +} + +static unsigned sprp(unsigned long long n, unsigned long long a) { + unsigned long long d = n - 1; + unsigned char s = 0; + while (!(d & 0xff)) { + d >>= 8; + s += 8; + } + if (!(d & 0xf)) { + d >>= 4; + s += 4; + } + if (!(d & 0x3)) { + d >>= 2; + s += 2; + } + if (!(d & 0x1)) { + d >>= 1; + s += 1; + } + unsigned long long b = pow_mod(a, d, n); + if ((b == 1) || (b == (n - 1))) return 1; + unsigned char r; + for (r = 1; r < s; r++) { + b = mul_mod(b, b, n); + if (b <= 1) return 0; + if (b == (n - 1)) return 1; + } + return 0; +} + +static unsigned is_prime(unsigned long long n) { + if (n < 2 || !(n & 1)) return 0; + if (n < 4) return 1; + if (!sprp(n, 2)) return 0; + if (n < 2047) return 1; + if (!sprp(n, 3)) return 0; + if (!sprp(n, 5)) return 0; + if (!sprp(n, 7)) return 0; + if (!sprp(n, 11)) return 0; + if (!sprp(n, 13)) return 0; + if (!sprp(n, 17)) return 0; + if (!sprp(n, 19)) return 0; + if (!sprp(n, 23)) return 0; + if (!sprp(n, 29)) return 0; + if (!sprp(n, 31)) return 0; + if (!sprp(n, 37)) return 0; + return 1; +} + +} // namespace detail + +static inline void rapidhash_make_secret(uint64_t seed, uint64_t* secret) { + uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, + 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, + 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, + 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, + 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, + 204, 209, 210, 212, 216, 225, 226, 228, 232, 240}; + for (size_t i = 0; i < 3; i++) { + uint8_t ok; + do { + ok = 1; + secret[i] = 0; + for (size_t j = 0; j < 64; j += 8) + secret[i] |= static_cast(c[detail::wyrand(&seed) % sizeof(c)]) + << j; + if (secret[i] % 2 == 0) { + ok = 0; + continue; + } + for (size_t j = 0; j < i; j++) { + if (v8::base::bits::CountPopulation(secret[j] ^ secret[i]) != 32) { + ok = 0; + break; + } + } + if (ok && !detail::is_prime(secret[i])) { + ok = 0; + } + } while (!ok); + } +} + +#endif // !V8_USE_DEFAULT_HASHER_SECRET + +#endif // _THIRD_PARTY_RAPIDHASH_SECRET_H diff --git a/test/fixtures/array-hash-collision.js b/test/fixtures/array-hash-collision.js new file mode 100644 index 000000000..b803de6b9 --- /dev/null +++ b/test/fixtures/array-hash-collision.js @@ -0,0 +1,27 @@ +'use strict'; + +// See https://hackerone.com/reports/3511792 + +const payload = []; +const val = 1234; +const MOD = 2 ** 19; +const CHN = 2 ** 17; +const REP = 2 ** 17; + +if (process.argv[2] === 'benign') { + for (let i = 0; i < CHN + REP; i++) { + payload.push(`${val + i}`); + } +} else { + let j = val + MOD; + for (let i = 1; i < CHN; i++) { + payload.push(`${j}`); + j = (j + i) % MOD; + } + for (let k = 0; k < REP; k++) { + payload.push(`${val}`); + } +} + +const string = JSON.stringify({ data: payload }); +JSON.parse(string); diff --git a/test/pummel/test-array-hash-collision.js b/test/pummel/test-array-hash-collision.js new file mode 100644 index 000000000..3910761b4 --- /dev/null +++ b/test/pummel/test-array-hash-collision.js @@ -0,0 +1,27 @@ +'use strict'; + +// This is a regression test for https://hackerone.com/reports/3511792 + +require('../common'); +const assert = require('assert'); +const { spawnSync } = require('child_process'); +const { performance } = require('perf_hooks'); +const fixtures = require('../common/fixtures'); + +const fixturePath = fixtures.path('array-hash-collision.js'); + +const t0 = performance.now(); +const benignResult = spawnSync(process.execPath, [fixturePath, 'benign']); +const benignTime = performance.now() - t0; +assert.strictEqual(benignResult.status, 0); +console.log(`Benign test completed in ${benignTime.toFixed(2)}ms.`); + +const t1 = performance.now(); +const maliciousResult = spawnSync(process.execPath, [fixturePath, 'malicious'], { + timeout: Math.ceil(benignTime * 10), +}); +const maliciousTime = performance.now() - t1; +console.log(`Malicious test completed in ${maliciousTime.toFixed(2)}ms.`); + +assert.strictEqual(maliciousResult.status, 0, 'Hash flooding regression detected: ' + + `Benign took ${benignTime}ms, malicious took more than ${maliciousTime}ms.`); diff --git a/tools/make-v8.sh b/tools/make-v8.sh index 8d8f090f0..103933e29 100755 --- a/tools/make-v8.sh +++ b/tools/make-v8.sh @@ -5,6 +5,8 @@ set -xe BUILD_ARCH_TYPE=$1 V8_BUILD_OPTIONS=$2 +EXTRA_V8_OPTS="v8_enable_static_roots=false v8_enable_seeded_array_index_hash=true v8_use_default_hasher_secret=false" + cd deps/v8 || exit find . -type d -name .git -print0 | xargs -0 rm -rf ../../tools/v8/fetch_deps.py . diff --git a/tools/v8_gypfiles/features.gypi b/tools/v8_gypfiles/features.gypi index f74f411e4..ac1bceb25 100644 --- a/tools/v8_gypfiles/features.gypi +++ b/tools/v8_gypfiles/features.gypi @@ -194,6 +194,9 @@ # Use Siphash as added protection against hash flooding attacks. 'v8_use_siphash%': 0, + # Enable seeded array index hash. + 'v8_enable_seeded_array_index_hash%': 1, + # Use Perfetto (https://perfetto.dev) as the default TracingController. Not # currently implemented. 'v8_use_perfetto%': 0, @@ -436,6 +439,9 @@ ['v8_use_siphash==1', { 'defines': ['V8_USE_SIPHASH',], }], + ['v8_enable_seeded_array_index_hash==1', { + 'defines': ['V8_ENABLE_SEEDED_ARRAY_INDEX_HASH',], + }], ['v8_enable_shared_ro_heap==1', { 'defines': ['V8_SHARED_RO_HEAP',], }], -- 2.30.2